Import everything we need

In [1]:
import pandas as pd
import numpy as np
import pickle

# Set Pandas display options so we can see more data
pd.set_option('display.width', 1000)

In [2]:
# Reload the trained model
tlo_classifier_file = "models/tlo_lr_classifier_02.18.16.dat"

logClassifier = pickle.load(open(tlo_classifier_file, "rb"))

LogisticRegression(C=1, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=111, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

Clean the new data

In [3]:
tlo_data_file = 'data/tlo_check_07_28_15_check_scores_anonymized.csv'
raw_data = pd.DataFrame.from_csv(tlo_data_file, 

full_name_check_value last_name_check_value ssn_score dob_score n1_score n2_score n3_score n4_score n5_score n6_score ... n10_score n11_score n12_score n13_score n14_score verified ssn_match dob_match name_match failure_explanation
17076 1 1 300 186 NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN 0 1 0 1 DOB
16744 0 0 0 0 7 7 7 7 7 7 ... 6 6 6 6 6 0 0 0 0 SSN DOB NAME
42421 1 1 300 225 NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN 0 1 0 1 DOB
42641 1 1 266 263 NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN 0 0 0 1 SSN DOB
43253 0 0 0 0 27 27 27 27 27 27 ... 22 22 22 22 22 0 0 0 0 SSN DOB NAME

5 rows × 23 columns

In [4]:
# Lowercase the text fields
raw_data['failure_explanation'] = raw_data['failure_explanation'].str.lower()

In [5]:
# Failure Explanations: 'dob', 'name', 'ssn dob name', 'ssn', 'ssn name', 'ssn dob','dob name', nan
def update_failure_explanations(type):
    if type == 'dob':
        return 0
    elif type == 'name':
        return 1
    elif type == 'ssn dob name':
        return 2
    elif type == 'ssn':
        return 3
    elif type == 'ssn name':
        return 4
    elif type == 'ssn dob':
        return 5
    elif type == 'dob name':
        return 6

In [6]:
# Convert all strings to numerics
raw_data['failure_explanation'] = raw_data['failure_explanation'].apply(update_failure_explanations)

full_name_check_value last_name_check_value ssn_score dob_score n1_score n2_score n3_score n4_score n5_score n6_score ... n10_score n11_score n12_score n13_score n14_score verified ssn_match dob_match name_match failure_explanation
17076 1 1 300 186 NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN 0 1 0 1 0
16744 0 0 0 0 7 7 7 7 7 7 ... 6 6 6 6 6 0 0 0 0 2
42421 1 1 300 225 NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN 0 1 0 1 0
42641 1 1 266 263 NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN 0 0 0 1 5
43253 0 0 0 0 27 27 27 27 27 27 ... 22 22 22 22 22 0 0 0 0 2

5 rows × 23 columns

In [7]:
# Handle missing values
raw_data.fillna(0, inplace=True)

full_name_check_value last_name_check_value ssn_score dob_score n1_score n2_score n3_score n4_score n5_score n6_score ... n10_score n11_score n12_score n13_score n14_score verified ssn_match dob_match name_match failure_explanation
17076 1 1 300 186 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 1 0
16744 0 0 0 0 7 7 7 7 7 7 ... 6 6 6 6 6 0 0 0 0 2
42421 1 1 300 225 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 1 0
42641 1 1 266 263 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 5
43253 0 0 0 0 27 27 27 27 27 27 ... 22 22 22 22 22 0 0 0 0 2

5 rows × 23 columns

In [8]:
# Reorder the columns for splitting
# cols = list(raw_data)
# cols.insert(len(raw_data.columns)-1, cols.pop(cols.index('verified')))
# raw_data = raw_data.ix[:, cols]

cols = ['full_name_check_value',
raw_data= raw_data[cols]

full_name_check_value ssn_score dob_score n1_score n2_score n3_score n4_score n5_score n6_score n7_score ... n11_score n12_score n13_score n14_score ssn_match dob_match name_match failure_explanation last_name_check_value verified
17076 1 300 186 0 0 0 0 0 0 0 ... 0 0 0 0 1 0 1 0 1 0
16744 0 0 0 7 7 7 7 7 7 7 ... 6 6 6 6 0 0 0 2 0 0
42421 1 300 225 0 0 0 0 0 0 0 ... 0 0 0 0 1 0 1 0 1 0
42641 1 266 263 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 5 1 0
43253 0 0 0 27 27 27 27 27 27 27 ... 22 22 22 22 0 0 0 2 0 0

5 rows × 23 columns

In [9]:
# Split the dataset between features and targets
tlo_data = raw_data.iloc[:,0:22].values
tlo_targets = raw_data['verified'].values

Use the model to create the predictions

In [10]:
# tlo_data
# Make a prediction for each item in our data
for item in tlo_data:

/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
/Users/robert.dempsey/anaconda/lib/python3.5/site-packages/sklearn/utils/ DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.

In [ ]: